# This code is to create data for PSID spell analysis
# Created by Shuyi Qiu
# Generated on Jan. 23rd, 2024
# Last edited on Mar. 2nd, 2024

# 0. Settings ----
set.seed(27705)
setwd("/Users/RachelChiu/Documents/ReProj/PSID/spell")
library(tidyverse)
library(ggplot2)
library(psidread)
library(dplyr)
library(haven)
library(readxl)
library(data.table)
library(Amelia)
library(janitor)
library(mitools)
library(mice)

# 1. Import data ----
psid_varlist <- c(
  ## 1.1 Weight ----
  ### Longitudinal individual weight ----
  "pwt || [99]ER33546 [01]ER33637 [03]ER33740 [05]ER33848 [07]ER33950 
  [09]ER34045 [11]ER34154 [13]ER34268 [15]ER34413 [17]ER34650 [19]ER34863 [21]ER35064",
  ### Longitudinal household weight ----
  "fwt || [99]ER16518 [01]ER20394 [03]ER24179 [05]ER28078 [07]ER41069 
  [09]ER47012 [11]ER52436 [13]ER58257 [15]ER65492 [17]ER71570 [19]ER77631 [21]ER81958",
  ### Why non-response ----
  "why_nonresp || [99]ER33545 [01]ER33636 [03]ER33739 [05]ER33847 [07]ER33949 
  [09]ER34044 [11]ER34153 [13]ER34267 [15]ER34412 [17]ER34649 [19]ER34862 [21]ER35063",
  
  ## 1.2 Individual Info ----
  ### Individual's age (Child's age) ----
  "p_age || [99]ER33504 [01]ER33604 [03]ER33704 [05]ER33804 [07]ER33904 
  [09]ER34004 [11]ER34104 [13]ER34204 [15]ER34305 [17]ER34504 [19]ER34704 [21]ER34904",
  ### Individual's educ (Outcome) ----
  "p_educ || [99]ER33516 [01]ER33616 [03]ER33716 [05]ER33817 [07]ER33917 
  [09]ER34020 [11]ER34119 [13]ER34230 [15]ER34349 [17]ER34548 [19]ER34752 [21]ER34952",
  ### Individual's employment status ----
  "p_employ || [99]ER33512 [01]ER33612 [03]ER33712 [05]ER33813 [07]ER33913 
  [09]ER34016 [11]ER34116 [13]ER34216 [15]ER34317 [17]ER34516 [19]ER34716 [21]ER34916",
  "p_hsged || [13]ER34220 [15]ER34320 [17]ER34519 [19]ER34719 [21]ER34919",
  "p_yrgradhs || [15]ER34322 [17]ER34521 [19]ER34721 [21]ER34921",
  "p_yrgradged || [15]ER34327 [17]ER34526 [19]ER34726 [21]ER34926",

  ## 1.3 Wealth ----
  ### Farm and business ----
  "farmbus_net || [99]S403 [01]S503 [03]S603 [05]S703 [07]S803 [09]ER46938 [11]ER52346",
  "farmbus_wd || [13]ER58155 [15]ER65352 [17]ER71429 [19]ER77451 [21]ER81778",
  ### Real estate ---- 
  "estate_net || [99]S409 [01]S509 [03]S609 [05]S709 [07]S809 [09]ER46950 [11]ER52354",
  "estate_wd || [13]ER58165 [15]ER65362 [17]ER71439 [19]ER77465 [21]ER81792",
  ### Checking and savings ----
  "cksv || [99]S405 [01]S505 [03]S605 [05]S705 [07]S805 [09]ER46942 [11]ER52350 
  [13]ER58161 [15]ER65358 [17]ER71435 [19]ER77457 [21]ER81784",
  ### Bonds ----
  "bonds || [19]ER77461 [21]ER81788",
  ### Stocks ----
  "stocks || [99]S411 [01]S511 [03]S611 [05]S711 [07]S811 [09]ER46954 [11]ER52358 
  [13]ER58171 [15]ER65368 [17]ER71445 [19]ER77471 [21]ER81798",
  ### Vehicles - to be subtracted from household wealth ----
  "vehicles || [99]S413 [01]S513 [03]S613 [05]S713 [07]S813 [09]ER46956 [11]ER52360 
  [13]ER58173 [15]ER65370 [17]ER71447 [19]ER77473 [21]ER81800",
  ### Other assets ----
  "oassets || [99]S415 [01]S515 [03]S615 [05]S715 [07]S815 [09]ER46960 [11]ER52364 
  [13]ER58177 [15]ER65374 [17]ER71451 [19]ER77477 [21]ER81804",
  ### Annuities and IRA ----
  "annuities || [99]S419 [01]S519 [03]S619 [05]S719 [07]S819 [09]ER46964 [11]ER52368 
  [13]ER58181 [15]ER65378 [17]ER71455 [19]ER77481 [21]ER81808",
  
  ## 1.4 Debts ----
  ### Total debts ----
  "total_d || [99]S407 [01]S507 [03]S607 [05]S707 [07]S807 [09]ER46946",
  ### Debts from farm and business ----
  "farmbus_d || [13]ER58157 [15]ER65354 [17]ER71431 [19]ER77453 [21]ER81780",
  ### Debts from real estate ----
  "estate_d || [13]ER58167 [15]ER65364 [17]ER71441 [19]ER77467 [21]ER81794",
  ### Debts from credit cards ----
  "card_d || [11]ER52372 [13]ER58185 [15]ER65382 [17]ER71459 [19]ER77485 [21]ER81812",
  ### Student loans ----
  "student_d || [11]ER52376 [13]ER58189 [15]ER65386 [17]ER71463 [19]ER77489 [21]ER81816",
  ### Medical debts ----
  "med_d || [11]ER52380 [13]ER58193 [15]ER65390 [17]ER71467 [19]ER77493 [21]ER81820",
  ### Legal debts ----
  "legal_d || [11]ER52384 [13]ER58197 [15]ER65394 [17]ER71471 [19]ER77497 [21]ER81824",
  ### Family loans ----
  "family_d || [11]ER52388 [13]ER58201 [15]ER65398 [17]ER71475 [19]ER77501 [21]ER81828",
  ### Other debts ----
  "other_d || [13]ER58205 [15]ER65402 [17]ER71479 [19]ER77505 [21]ER81832",
  
  ## 1.5 Total wealth ----
  ### Wealth without home equity ----
  "nwwoe || [99]S416 [01]S516 [03]S616 [05]S716 [07]S816 [09]ER46968 [11]ER52392 
  [13]ER58209 [15]ER65406 [17]ER71483 [19]ER77509 [21]ER81836",
  ### Home equity ----
  "homevalue || [99]S420 [01]S520 [03]S620 [05]S720 [07]S820 [09]ER46966 [11]ER52390 
  [13]ER58207 [15]ER65404 [17]ER71481 [19]ER77507 [21]ER81834",
  ### Wealth with home equity ----
  "nwwe || [99]S417 [01]S517 [03]S617 [05]S717 [07]S817 [09]ER46970 [11]ER52394 
  [13]ER58211 [15]ER65408 [17]ER71485 [19]ER77511 [21]ER81838",
  
  ## 1.6 Household Info (by year) ----
  ### Number of children----
  "num_child || [99]ER13013 [01]ER17016 [03]ER21020 [05]ER25020 [07]ER36020 [09]ER42020 [11]ER47320 
  [13]ER53020 [15]ER60021 [17]ER66021 [19]ER72021 [21]ER78021",
  ### Family size----
  "famsize || [99]ER13009 [01]ER17012 [03]ER21016 [05]ER25016 [07]ER36016 [09]ER42016 [11]ER47316 
  [13]ER53016 [15]ER60016 [17]ER66016 [19]ER72016 [21]ER78016",
  ### Household income----
  "inc || [99]ER16462 [01]ER20456 [03]ER24099 [05]ER28037 [07]ER41027 [09]ER46935 [11]ER52343 
  [13]ER58152 [15]ER65349 [17]ER71426 [19]ER77448 [21]ER81775",
  ### Head's age ----
  "hh_age || [99]ER13010 [01]ER17013 [03]ER21017 [05]ER25017 [07]ER36017 [09]ER42017 [11]ER47317 
  [13]ER53017 [15]ER60017 [17]ER66017 [19]ER72017 [21]ER78017",
  ### Head's sex ----
  "hh_sex || [99]ER13011 [01]ER17014 [03]ER21018 [05]ER25018 [07]ER36018 [09]ER42018 [11]ER47318 
  [13]ER53018 [15]ER60018 [17]ER66018 [19]ER72018 [21]ER78018",
  ### Head's marital status ----
  "hh_marital || [99]ER16423 [01]ER20369 [03]ER24150 [05]ER28049 [07]ER41039 [09]ER46983 [11]ER52407 
  [13]ER58225 [15]ER65461 [17]ER71540 [19]ER77601 [21]ER81928",
  ### Head's education ----
  "hh_educ || [99]ER16516 [01]ER20457 [03]ER24148 [05]ER28047 [07]ER41037 [09]ER46981 [11]ER52405 
  [13]ER58223 [15]ER65459 [17]ER71538 [19]ER77599 [21]ER81926",
  ### Head's race & ethnicity ----
  "hh_race_1st || [99]ER15928 [01]ER19989 [03]ER23426 [05]ER27393 [07]ER40565 [09]ER46543 [11]ER51904 
  [13]ER57659 [15]ER64810 [17]ER70882 [19]ER76897 [21]ER81144",
  "hh_race_2nd || [99]ER15929 [01]ER19990 [03]ER23427 [05]ER27394 [07]ER40566 [09]ER46544 [11]ER51905 
  [13]ER57660 [15]ER64811 [17]ER70883 [19]ER76898 [21]ER81145",
  "hh_hisp_1st || [05]ER27392 [07]ER40564 [09]ER46542 [11]ER51903 [13]ER57658 [15]ER64809 [17]ER70881 [19]ER76896 [21]ER81143",
  ### Head's health status ----
  "hh_health || [99]ER15447 [01]ER19612 [03]ER23009 [05]ER26990 [07]ER38202 [09]ER44175 [11]ER49494 
  [13]ER55244 [15]ER62366 [17]ER68420 [19]ER74428 [21]ER80550",
  ### Food stamps ----
  "hh_receivefs || [99]ER14255 [01]ER18386 [03]ER21652 [05]ER25654 [07]ER36672 [09]ER42691 [11]ER48007 
  [13]ER53704 [15]ER60719 [17]ER66766 [19]ER72770 [21]ER78847"
)

str_df <- psid_str(varlist = psid_varlist, type = "separated")

psid_indir <- "/Users/RachelChiu/Documents/ReProj/PSID/Data"
psid_df <- psid_read(indir = psid_indir, 
                     str_df = str_df,
                     idvars = c("ER32000","ER32013","ER32020"),
                     type = "package",
                     filename = NA)
df <- psid_reshape(psid_df = psid_df,
                   str_df = str_df,
                   shape = "long",
                   level = "individual")

rm(psid_df) # Space clear

## 1.7 Sample ID ----
sample_id <- df |> 
  filter(p_age >= 1 & p_age <= 5 & (year == 1999 | year == 2001)) |> 
  select(pid, rel2hh, year, p_age) |> 
  group_by(pid) |> 
  slice_min(order_by = year, with_ties = FALSE) |> 
  filter(rel2hh %in% c(30,33,35,37,60,65)) |> 
  mutate(year = as.numeric(year)) |> 
  rename(
    entry_year = year,
    entry_age = p_age
  ) |> 
  mutate(
    exit_year = ifelse((entry_year - entry_age) %% 2 == 0, entry_year - entry_age + 19, entry_year - entry_age + 18),
    exit_age = ifelse((entry_year - entry_age) %% 2 == 0, 19, 18),
    ms_year = ifelse((entry_year - entry_age) %% 2 == 0, entry_year - entry_age + 21, entry_year - entry_age + 20),
    ms_age = ifelse((entry_year - entry_age) %% 2 == 0, 21, 20)
  ) |> 
  rename(rel2hh_entry = rel2hh)

d <- df |> 
  right_join(sample_id, by = "pid") |> 
  mutate(
    fu_year = ifelse(year >= entry_year & year <= exit_year, T, F),
    missed_year = ifelse(fu_year == T & indfid == 0, T, F),
    nonresponse = ifelse(indfid == 0, T, F)
  )

## 1.8 Poverty thresholds & Inflation ----
pov_thresholds <- read_dta("AddData/pov_thresholds.dta")
inflation_df <- read_xlsx("AddData/DollarValueConvertRatio.xlsx")
dollar21 <- as.numeric(inflation_df[which(inflation_df$Year == 2021),"Ratio"])
inflation_df <- inflation_df |> 
  mutate(Ratio21 = Ratio/dollar21)

## 1.9 Household with grandparents ----
gp_df <- df |> 
  select(year, indfid, rel2hh) |> 
  filter(rel2hh %in% c(50, 57, 58, 66, 67, 68, 69, 72, 73)) |> 
  group_by(year, indfid) |> 
  summarise(num_gp = n())
  
## 1.10 First child ----
fc_df <- df |> 
  select(pid, ER32013, ER32020, year) |> 
  mutate(year = as.numeric(year)) |> 
  right_join(sample_id |> select(pid, entry_year), by = c("pid", "year"="entry_year")) |> 
  mutate(
    birth_order = case_when(
      (!is.na(ER32013)) & ER32013 <= 25 ~ ER32013,
      (is.na(ER32013)|(ER32013 > 25)) & (!is.na(ER32020)) & (ER32020 <= 25) ~ ER32020,
      TRUE ~ NA_real_
    ),
    is_first_born = as.factor(ifelse(birth_order == 1, TRUE, FALSE))
  ) |> 
  select(pid, birth_order, is_first_born)
  
# 2. Prepare data for imputation ----
forimp <- d |> 
  filter(year >= entry_year) |> 
  mutate(
    ## 2.1 Wealth Calibration ----
    ### farmbus_net ----
    farmbus_net = ifelse(year >= 2013, farmbus_wd - farmbus_d, farmbus_net),
    ### estate_net ----
    estate_net = ifelse(year >= 2013, estate_wd - estate_d, estate_net),
    ### bonds ----
    # Keep the same
    ### cksv ----
    # Keep the same
    ### stocks ----
    # Keep the same
    ### vehicles ----
    # Keep the same
    ### oassets ----
    # Keep the same
    ### annuities ----
    # Keep the same
    
    ### tassets ----
    tassets = case_when(
      year <= 2017 ~ farmbus_net + estate_net + cksv + stocks + vehicles + oassets + annuities,
      year >= 2019 ~ farmbus_net + estate_net + bonds + cksv + stocks + vehicles + oassets + annuities
    ),
    
    ### tdebts ----
    tdebts = case_when(
      year <= 2009 ~ total_d,
      year == 2011 ~ card_d + student_d + med_d + legal_d + family_d,
      year >= 2013 ~ card_d + student_d + med_d + legal_d + family_d + other_d
    ),
    ### nwwoe ----
    # Keep the same
    ### homevalue ----
    # Keep the same
    ### nwwe ----
    # Keep the same
    
    ## 2.2 Household Info ----
    ### n_children ----
    n_children = ifelse(num_child < 20, num_child, NA),
    ### n_adults ----
    n_adults = ifelse(famsize < 21, famsize - n_children, NA),
    ### inc ----
    # Keep the same
    ### hh_age ----
    hh_age = ifelse(hh_age >= 14 & hh_age <= 120, hh_age, NA),
    hh_age_gt35 = ifelse(hh_age >= 35, T, F),
    ### hh_sex ----
    hh_sex = as.factor(case_when(
      hh_sex == 1 ~ "M",
      hh_sex == 2 ~ "F",
      TRUE ~ NA_character_
    )),
    ### hh_marital ----
    hh_marital = as.factor(case_when(
      hh_marital == 1 ~ "Married",
      hh_marital == 2 ~ "Single",
      hh_marital == 3 ~ "Widowed",
      hh_marital %in% c(4, 5) ~ "Divorced or Separated",
      TRUE ~ NA_character_
    )),
    ### hh_educ ----
    hh_educ = ifelse(hh_educ >= 0 & hh_educ <= 17, hh_educ, NA),
    ### hh_racehisp ----
    #### First mention
    hh_racehisp_1st = case_when(
      hh_race_1st == 1 ~ "White",
      hh_race_1st == 2 ~ "Black",
      hh_race_1st > 2 & hh_race_1st < 8 ~ "Others",
      TRUE ~ NA_character_
    ),
    #### Second mention
    hh_racehisp_2nd = case_when(
      hh_racehisp_1st %in% c("White","Others") & hh_race_2nd == 2  ~ "Black",
      hh_racehisp_1st == "White" & (hh_race_2nd > 2 & hh_race_2nd < 8) ~ "Others",
      TRUE ~ hh_racehisp_1st
    ),
    #### Hispanic
    hh_racehisp = as.factor(ifelse((year < 2005 & (hh_race_1st == 5 | hh_race_2nd == 5)) | (year >= 2005 & hh_hisp_1st >= 1 & hh_hisp_1st <= 7), "Hispanic", hh_racehisp_2nd)),
    ### hh_poorhealth ----
    hh_poorhealth = as.factor(case_when(
      hh_health %in% c(1,2,3) ~ F,
      hh_health %in% c(4,5) ~ T,
      TRUE ~ NA_integer_
    )),
    ### h_foodstamps ----
    h_foodstamps = as.factor(case_when(
      hh_receivefs == 1 ~ T,
      hh_receivefs == 5 ~ F,
      TRUE ~ NA_integer_
    )),
    
    ## 2.3 Individual Info ----
    ### p_gender ----
    p_gender = as.factor(ifelse(ER32000 == 1, "M", "F")),
    ### p_educ ----
    p_educ = ifelse(p_educ >= 1 & p_educ <= 17, p_educ, NA)
  ) |> 
  ### is_first_child ----
  left_join(fc_df, by = "pid") |> 
  left_join(gp_df, by = c("year","indfid")) |> 
  mutate(
    ### h_livewgp ----
    h_livewgp = as.factor(case_when(
      !is.na(num_gp) | rel2hh %in% c(60, 65)  ~ T,
      indfid != 0 & is.na(num_gp) & !rel2hh %in% c(60, 65) ~ F,
      TRUE ~ NA_integer_
    )),
    hh_parents = as.factor(case_when(
      rel2hh %in% c(30, 33, 35) ~ T,
      indfid != 0 & !rel2hh %in% c(30, 33, 35) ~ F,
      TRUE ~ NA_integer_
    )),
    hh_gp = as.factor(case_when(
      rel2hh %in% c(60, 65) ~ T,
      indfid != 0 & !rel2hh %in% c(60, 65) ~ F,
      TRUE ~ NA_integer_
    ))
  )

# temp <- forimp |> 
#   select(pid, year, why_nonresp, indfid, tassets, tdebts, nwwoe, homevalue, nwwe, inc)

## 2.4 Education outcomes ----
cdoc_long <- df |> 
  right_join(sample_id, by = "pid") |> 
  select(pid, year, entry_year, entry_age, exit_year, exit_age, p_age, ms_year, ms_age, p_educ, why_nonresp, p_employ) |> 
  filter(year >= entry_year) |> 
  mutate(
    year = as.numeric(year),
    p_age = as.factor(ifelse(p_age == 0 | p_age > 100, NA, p_age)), 
    p_age_imp = entry_age + year - entry_year,
    p_age_dif = ifelse(p_age != p_age_imp, TRUE, FALSE),
    p_age_comp = ifelse(is.na(p_age), p_age_imp, p_age),
    fake_p_age = ifelse(year == 2021 & p_age_comp < 20, 20, p_age_comp),
    p_age_vs20 = case_when(
      p_age_comp < 19 ~ -2,
      p_age_comp == 19 ~ -1,
      p_age_comp == 20 ~ 0,
      p_age_comp == 21 ~ 1,
      p_age_comp > 21 ~ 2
    ),
    p_educ = ifelse(p_educ > 17 | p_educ == 0, NA, p_educ),
    p_employ = ifelse(p_employ == 9 | p_employ == 0, NA, p_employ),
    p_working = as.factor(case_when(
      p_employ == 1 | p_employ == 2 | why_nonresp == 11 ~ "working",
      p_employ == 7 | why_nonresp == 12 ~ "school",
      p_employ > 2 & p_employ < 9 & p_employ != 7 & why_nonresp != 12 & why_nonresp != 11 ~ "not working nor school",
      TRUE ~ NA_character_))
  ) 

why_nonresp_df <- cdoc_long %>%
  select(pid, year, why_nonresp) %>%
  mutate(year_for_merge = year - 2) %>%
  select(-year)

edu_min_ge12_df <- cdoc_long %>%
  left_join(why_nonresp_df, by = c("pid"="pid", "year" = "year_for_merge"), suffix = c("","_aft")) %>%
  filter(p_educ >= 12) %>%
  group_by(pid) %>%
  slice_min(order_by = p_educ, with_ties = FALSE) %>%
  select(pid, year, p_educ, why_nonresp_aft) %>%
  rename(year_educ_min_ge12 = year,
         p_educ_min_ge12 = p_educ) 

edu_max_lt12_df <- cdoc_long %>%
  filter(p_educ < 12) %>%
  group_by(pid) %>%
  slice_max(order_by = p_educ, with_ties = TRUE) %>%
  mutate(temp_order = row_number()) %>%
  slice_max(order_by = temp_order, with_ties = FALSE) %>%
  select(pid, year, p_educ) %>%
  rename(year_educ_max_lt12 = year,
         p_educ_max_lt12 = p_educ)

edu_min_ge13_df <- cdoc_long %>%
  filter(p_educ >= 13) %>%
  group_by(pid) %>%
  slice_min(order_by = p_educ, with_ties = FALSE) %>%
  select(pid, year, p_educ) %>%
  rename(year_educ_min_ge13 = year,
         p_educ_min_ge13 = p_educ) 

edu_max_lt13_df <- cdoc_long %>%
  filter(p_educ < 13) %>%
  group_by(pid) %>%
  slice_max(order_by = p_educ, with_ties = TRUE) %>%
  mutate(temp_order = row_number()) %>%
  slice_max(order_by = temp_order, with_ties = FALSE) %>%
  select(pid, year, p_educ) %>%
  rename(year_educ_max_lt13 = year,
         p_educ_max_lt13 = p_educ)

edu_df <- cdoc_long %>%
  select(pid, year, p_educ, p_age, p_age_comp, fake_p_age) %>%
  left_join(edu_min_ge12_df, by = "pid") %>%
  left_join(edu_max_lt12_df, by = "pid") %>%
  left_join(edu_min_ge13_df, by = "pid") %>%
  left_join(edu_max_lt13_df, by = "pid") %>%
  mutate(
    hs_completed = case_when(
      !is.na(p_educ_min_ge12) & p_educ_min_ge12 <= 13 & year < year_educ_min_ge12 ~ FALSE,
      !is.na(p_educ_min_ge12) & p_educ_min_ge12 <= 13 & year >= year_educ_min_ge12 ~ TRUE,
      !is.na(p_educ_min_ge12) & p_educ_min_ge12 > 13 & !is.na(p_educ_max_lt12) & year <= year_educ_max_lt12 ~ FALSE,
      !is.na(p_educ_min_ge12) & p_educ_min_ge12 > 13 & !is.na(p_educ_max_lt12) & year >= year_educ_min_ge12 ~ TRUE,
      !is.na(p_educ_min_ge12) & p_educ_min_ge12 > 13 & is.na(p_educ_max_lt12) & year >= year_educ_min_ge12 ~ TRUE,
      #is.na(p_educ_min_ge12) & !is.na(p_educ_min) & (year <= year_educ_min | p_educ == p_educ_min) ~ FALSE,
      #is.na(p_educ_min_ge12) & !is.na(p_educ_min) & year > year_educ_min & p_educ != p_educ_min ~ NA
      is.na(p_educ_min_ge12) & !is.na(p_educ_max_lt12) & year <= year_educ_max_lt12 ~ FALSE,
      TRUE ~ NA
    ),
    clg_enrolled = case_when(
      !is.na(p_educ_min_ge13) & p_educ_min_ge13 == 13 & year < year_educ_min_ge13 ~ FALSE,
      !is.na(p_educ_min_ge13) & p_educ_min_ge13 == 13 & year >= year_educ_min_ge13 ~ TRUE,
      !is.na(p_educ_min_ge13) & p_educ_min_ge13 > 13 & !is.na(p_educ_max_lt13) & year <= year_educ_max_lt13 ~ FALSE,
      !is.na(p_educ_min_ge13) & p_educ_min_ge13 > 13 & !is.na(p_educ_max_lt13) & year >= year_educ_min_ge13 ~ TRUE,
      !is.na(p_educ_min_ge13) & p_educ_min_ge13 > 13 & is.na(p_educ_max_lt13) & year >= year_educ_min_ge13 ~ TRUE,
      is.na(p_educ_min_ge13) & !is.na(p_educ_max_lt13) & p_educ_max_lt13 == 12 & why_nonresp_aft == 12 & year <= year_educ_max_lt13 ~ FALSE,
      is.na(p_educ_min_ge13) & !is.na(p_educ_max_lt13) & p_educ_max_lt13 == 12 & why_nonresp_aft == 12 & year > year_educ_max_lt13 ~ TRUE,
      is.na(p_educ_min_ge13) & !is.na(p_educ_max_lt13) & p_educ_max_lt13 <= 12 & year <= year_educ_max_lt13 ~ FALSE,
      TRUE ~ NA
    )
  ) |>  
  select(pid, year, hs_completed, clg_enrolled)

forimp <- forimp |> 
  mutate(year = as.numeric(year)) |> 
  left_join(edu_df, by = c("pid", "year"))

# 3. Multiple Imputation ----
id_cols <- c("p_gender","why_nonresp","p_age",
              "p_educ", "p_employ", "xsqnr", "rel2hh", "indfid",
             "entry_year", "entry_age", "exit_year", "exit_age", "ms_year", "ms_age",
             "fu_year", "missed_year")
logs_cols <- c("tassets","tdebts","homevalue", "vehicles","inc", "pwt", "hs_completed", "clg_enrolled")
noms_cols <- c("hh_sex",  "hh_poorhealth", "h_foodstamps", "is_first_born","h_livewgp", "hh_parents","hh_gp","hh_marital", "hh_racehisp")
ords_cols <- c("n_children", "n_adults", "hh_age", "hh_educ")
forimp <- forimp |> mutate(pwt = ifelse(pwt == 0, NA, pwt))
final_forimp <- forimp[,c("pid", "year", id_cols, logs_cols, noms_cols, ords_cols)]
along.out <- amelia(final_forimp, m = 5, p2s = 1,
                    idvars = id_cols,
                    ts = "year", cs = "pid",
                    ords = ords_cols,
                    noms = noms_cols,
                    logs = logs_cols,
                    polytime = 1)








  
  
  
  
